import scrapy
from zuocai.items import ZuocaiItem


class CaiSpider(scrapy.Spider):
    name = 'cai'
    allowed_domains = ['www.meishij.net']
    # 在这里可以切换我们的品种
    start_urls = ['https://www.meishij.net/caipufenlei/']

    def parse(self, response):
        base_url = 'https://www.meishij.net'
        urls = response.xpath('//div[@class="sort_main"]//ul/li//a/@href').extract()
        for url in urls:
            real_url = base_url + url
            # https://www.meishij.net/fenlei/xiawucha/
            for i in range(1, 6):  # 修改页面的长度() 具体要看情况来决定
                str1 = real_url + 'p{}/'.format(i)
                yield scrapy.Request(
                    url=str1,
                    callback=self.parse_third
                )

    def parse_third(self, response):
        titles = response.xpath(
            '//div[@class="list_s2_content"]//a[@class="list_s2_item_info"]/strong/text()').extract()
        urls = response.xpath('//div[@class="list_s2_content"]//div[@class="imgw"]//a[1]/@href').extract()
        if (titles != []) and urls != []:
            for j in range(0, len(titles)):
                yield scrapy.Request(
                    url=urls[j],
                    callback=self.parse_second,
                    meta={
                        'title': titles[j]
                    }
                )

    #
    def parse_second(self, response):
        # 带有样式的爬取数据
        zhuliao = response.xpath('//div[@class="recipe_ingredients"]//div[@class="right"]/strong').extract()
        fuliao = response.xpath(
            '//div[@class="recipe_ingredients recipe_ingredients1"]//div[@class="right"]/strong').extract()
        img_url = response.xpath('//div[@class="recipe_topimgw"]/img/@src').extract()
        shoucang = response.xpath('//div[@class="recipe_qrcodebox"]//a//em/text()').extract()
        if zhuliao != [] and fuliao != [] and img_url != [] and shoucang != []:
            yield ZuocaiItem(
                title=response.meta['title'],
                zhuliao=zhuliao,
                fuliao=fuliao,
                img_url=img_url,
                shoucang=shoucang
            )
